#### BRIDGE: Telemed COVID Paper ####
#### Telemed Data Sharing ####

library("Hmisc")
library("tidyverse")
library("lubridate")
library("childsds")
library("tidytext")
library("readxl")

#manually makes a mode-estimating function
modeest <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

#### Read in data - original data - not provided ####
put <- 'originaldatapath'
monthly1 <- read_csv(paste0(put, "monthly1.csv"))
monthly1 <- monthly1 %>% dplyr::select(-...1)

pap_crc_denom <- read_delim('originaldata', delim = "|", guess_max=10000) %>% rename_all(tolower) 
clinic<-read_delim("originaldata", delim="|", guess_max=10000) %>% rename_all(tolower)
# Clean up 
clinic <- clinic %>% select(-pap_denominator_study, -crc_denominator_study)
clinic1 <- clinic %>%
  left_join(pap_crc_denom) %>%
  group_by(delivery_site_id_masked) %>%
  filter(row_number()==1)

monthly_level <- monthly1 %>% select(month_start, delivery_site_id_masked, thencs, avth_encs, avencs, avth_patients,
                                     pct_avth_female)  #monthly1
clinic_level <- clinic1 %>% select(delivery_site_id_masked, ruca_recode2014, primary_ruca_2010, study_patients,
          advance_avth_patients, pct_pat_hispanic, pct_pat_other_race, pct_pat_white, mddo_providers, pct_mixed_insurance,
          pct_pat_ge65, pct_pat_black, pct_pat_charlson_5_6, pct_pat_charlson_7plus, pct_pat_50_64, pct_pat_30_49,
          pct_pat_medicaid_only, pct_pat_uninsured_only, pct_pat_medicare_only, pct_pat_female) #clinic1
write.csv(S2_fig1, file = paste0(put, "S2_fig1.csv"))
#### Datasets you need ####
put <- 'yourpathhere'
monthly_level <- read_csv(paste0(put, "monthly_level.csv"))
monthly_level <- monthly_level %>% dplyr::select(-...1)
clinic_level <- read_csv(paste0(put, "clinic_level.csv"))
clinic_level <- clinic_level %>% dplyr::select(-...1)
S2_fig1 <- read_csv(paste0(put, "S2_fig1.csv"))
S2_fig1 <- S2_fig1 %>% dplyr::select(-...1)

#### Figure 1 ####
selected_2020 <- monthly_level %>%
  select(month_start, delivery_site_id_masked, thencs, avth_encs) %>%
  filter(month_start > "2019-12-01") %>%
  group_by(delivery_site_id_masked) %>%
  mutate(perc_th = thencs/avth_encs*100)
selected_2020 <- selected_2020 %>% arrange(month_start) %>% 
  mutate(time = dense_rank(month_start)-1)
#make sure all sites are in all months and have visits each month
selected_2020_1 <- selected_2020 %>% 
  mutate(perc_th1 = ifelse(is.na(perc_th)==TRUE, 0, perc_th)) %>%
  group_by(delivery_site_id_masked) %>%
  mutate(num_rows = n()) %>%
  filter(num_rows == 18)
s2020_3 <- selected_2020_1 %>%
  group_by(delivery_site_id_masked) %>%
  mutate(no_visit = ifelse(avth_encs <=10, 1, 0)) %>% 
  filter(no_visit == 0) %>%
  filter(n()==18) 
avth_m <- s2020_3 %>%
  group_by(delivery_site_id_masked, month_start) %>%
  select(delivery_site_id_masked, avth_encs, thencs) %>%
  mutate(month = month(month_start),
         year = year(month_start)) %>%
  group_by(month_start, year, month) %>%
  summarise(encounter = sum(avth_encs),
            telemed = sum(thencs),
            percent_th = telemed/encounter*100)
#Plot
avth_plot <- ggplot(avth_m, aes(month_start, percent_th)) + #group = year, color = year
  geom_line(size = 1.5) +
  geom_vline(xintercept = as.Date("2021-01-01")) +
  labs(title = "Percent of Telemedicine Visits (Out of All Visits)") + 
  theme_bw() +
  scale_y_continuous(name = "Percent of Visits", lim = c(0,100)) +
  xlab(label = "Month/Year") +
  scale_x_date(date_breaks = "3 month", date_labels =  "%b %Y") 
avth_plot
ggsave(avth_plot, file = "pct_tele.png", path = "yourpathhere/", width = 7, height = 4, device='tiff', dpi=400)

#### Create Clusters ####
selected <- monthly_level %>%
  select(month_start, delivery_site_id_masked, thencs, avth_encs) %>%
  filter(month_start > "2020-03-01") %>%
  group_by(delivery_site_id_masked) %>%
  mutate(perc_th = thencs/avth_encs*100)
selected <- selected %>% arrange(month_start) %>% 
  mutate(time = dense_rank(month_start)-1)
selected1 <- selected %>% mutate(perc_th1 = ifelse(is.na(perc_th)==TRUE, 0, perc_th)) %>%
  group_by(delivery_site_id_masked) %>%
  mutate(num_rows = n()) %>%
  filter(num_rows == 15)
s1 <- selected1 %>%
  left_join(monthly_level) %>%
  left_join(clinic_level) %>%
  mutate(UR = ifelse(ruca_recode2014 %in% c("R", "ST"), "Rural", 
                     ifelse(ruca_recode2014 %in% c("UA","UC"), "Urban", 
                            ifelse(primary_ruca_2010 == "Metropolitan area core: primary flow within an urbanized area (UA)", "Urban",
                                   ifelse(primary_ruca_2010 == "Rural areas: primary flow to a tract outside a UA or UC", "Rural", NA)))),
         LS = ifelse(study_patients > 4922, "Large", "Small")) 
#delete clinic if they have any month with less than 11 visits  #
s3 <- s1 %>%
  group_by(delivery_site_id_masked) %>%
  mutate(no_visit = ifelse(avth_encs <=10, 1, 0)) %>% 
  filter(no_visit == 0) %>%
  filter(n()==15)
#LCGA ####
library(flexmix)
# set random number seed:
set.seed(0111)
# estimate lcga models with 1-7 classes: *6 was chosen by calculating AIC, BIC, ICL (not shown in code)
lcgaMix <-stepFlexmix(.~ .|delivery_site_id_masked, k = 6, nrep = 50, 
                      model = FLXMRglmfix(perc_th1 ~ time, varFix=TRUE), 
                      data = s3, control = list(iter.max = 500, minprior = 0))

f <-clusters(lcgaMix)
s2 <- s3 %>% cbind(f) %>% rename(cluster = ...34)
# Reconfiguring clusters
s2 <- s2 %>% mutate(new_cluster_name = ifelse(cluster == 2, "A",
                                       ifelse(cluster == 5, "D",
                                       ifelse(cluster == 3, "B",
                                       ifelse(cluster == 1, "E",
                                       ifelse(cluster == 4, "C", 
                                       ifelse(cluster == 6, "F", NA)))))))
s5 <- s2 %>% select(delivery_site_id_masked, cluster, new_cluster_name) %>% 
  filter(row_number()==1)

clinic_demo <- s5 %>% left_join(monthly1) %>% left_join(clinic1) %>%
  mutate(UR = ifelse(ruca_recode2014 %in% c("R", "ST"), "Rural", 
              ifelse(ruca_recode2014 %in% c("UA","UC"), "Urban", 
              ifelse(primary_ruca_2010 == "Metropolitan area core: primary flow within an urbanized area (UA)", "Urban",
              ifelse(primary_ruca_2010 == "Rural areas: primary flow to a tract outside a UA or UC", "Rural", NA)))),
         LS = ifelse(study_patients > 4922, "Large", "Small")) 
clinic_demo <- clinic_demo %>% ungroup()
#### Table 1 ####
cluster_total <- clinic_demo %>%
  ungroup() %>%
  summarise(female = mean(pct_pat_female),
            hisp = mean(pct_pat_hispanic),
            p30 = mean(pct_pat_30_49),
            p50 = mean(pct_pat_50_64),
            elderly = mean(pct_pat_ge65),
            black = mean(pct_pat_black),
            pwhite = mean(pct_pat_white),
            potherrace = mean(pct_pat_other_race),
            phisp = mean(pct_pat_hispanic),
            char5_6 = mean(pct_pat_charlson_5_6),
            char7 = mean(pct_pat_charlson_7plus),
            medi = mean(pct_pat_medicaid_only),
            unin = mean(pct_pat_uninsured_only),
            n_rural = sum(UR == "Rural", na.rm = TRUE)/30,
            n_urban = sum(UR == "Urban", na.rm = TRUE)/30,
            n_large = sum(LS == "Large", na.rm = TRUE)/30,
            n_small = sum(LS == "Small", na.rm = TRUE)/30,
            pmddo = mean(mddo_providers, na.rm = T),
            av_encs = sum(avencs),
            th_encs = sum(thencs),
            total_encs = sum(avth_encs),
            total_pats = mean(avth_patients))


#### Table 2 ####
f3 <- s2 %>%
  group_by(new_cluster_name) %>%
  summarise(female = mean(pct_pat_female),
            hisp = mean(pct_pat_hispanic),
            elderly = mean(pct_pat_ge65),
            black = mean(pct_pat_black),
            char5_6 = mean(pct_pat_charlson_5_6),
            char7 = mean(pct_pat_charlson_7plus),
            medi = mean(pct_pat_medicaid_only),
            unin = mean(pct_pat_uninsured_only),
            n_rural = sum(UR == "Rural", na.rm = TRUE)/15,
            n_urban = sum(UR == "Urban", na.rm = TRUE)/15,
            n_large = sum(LS == "Large", na.rm = TRUE)/15,
            n_small = sum(LS == "Small", na.rm = TRUE)/15,
            av_encs = sum(avencs),
            th_encs = sum(thencs),
            total_encs = sum(avth_encs),
            total_pats = mean(avth_patients))
group_time <- clinic_demo %>%
  filter(month_start > "2019-02-01" & month_start < '2020-03-01') %>%
  group_by(new_cluster_name) %>%
  summarise(th = sum(thencs),
            enc = sum(avth_encs),
            perc = (th/enc)*100,
            th_mean = mean(thencs))
#last row table 2
pats_cluster <- s2 %>% group_by(delivery_site_id_masked) %>%
  summarise(num_pats = first(advance_avth_patients),
            new_cluster_name = first(new_cluster_name)) %>%
  group_by(new_cluster_name) %>%
  summarise(q1pats = quantile(num_pats, 0.25),
            median = median(num_pats),
            q3pats = quantile(num_pats, 0.75))
larger_time <- clinic_demo %>%
  filter(month_start > "2019-02-01" & month_start < '2020-03-01') %>%
  summarise(th = sum(thencs),
            enc = sum(avth_encs),
            perc = (th/enc)*100,
            th_mean = mean(thencs))
#totals column
cluster_total <- s2 %>%
  ungroup() %>%
  summarise(female = mean(pct_pat_female),
            hisp = mean(pct_pat_hispanic),
            elderly = mean(pct_pat_ge65),
            black = mean(pct_pat_black),
            char5_6 = mean(pct_pat_charlson_5_6),
            char7 = mean(pct_pat_charlson_7plus),
            medi = mean(pct_pat_medicaid_only),
            unin = mean(pct_pat_uninsured_only),
            n_rural = sum(UR == "Rural", na.rm = TRUE)/15,
            n_urban = sum(UR == "Urban", na.rm = TRUE)/15,
            n_large = sum(LS == "Large", na.rm = TRUE)/15,
            n_small = sum(LS == "Small", na.rm = TRUE)/15,
            av_encs = sum(avencs),
            th_encs = sum(thencs),
            total_encs = sum(avth_encs),
            total_pats = mean(avth_patients))

last_rowtotal <- s2 %>% group_by(delivery_site_id_masked) %>%
  summarise(num_pats = first(advance_avth_patients),
            new_cluster_name = first(new_cluster_name)) %>%
  ungroup() %>%
  summarise(q1pats = quantile(num_pats, 0.25),
            median = median(num_pats),
            q3pats = quantile(num_pats, 0.75))

#### Figure 2 ####
s2 <- s2 %>% mutate(new_cluster_name = ifelse(cluster == 2, "A",
                                              ifelse(cluster == 5, "B",
                                                     ifelse(cluster == 3, "C",
                                                            ifelse(cluster == 1, "D",
                                                                   ifelse(cluster == 4, "E", 
                                                                          ifelse(cluster == 6, "F", NA)))))))
s5 <- s2 %>% select(delivery_site_id_masked, cluster, new_cluster_name) %>% filter(row_number()==1)
S2_fig1 <- s2 %>% select(month_start, perc_th, delivery_site_id_masked, new_cluster_name)

S2_fig1 <- S2_fig1 %>% mutate(new_cluster_name1 = ifelse(new_cluster_name == "A", "A (n=35)",
                                                         ifelse(new_cluster_name == "B", "B (n=34)",
                                                                ifelse(new_cluster_name == "C", "C (n=43)",
                                                                       ifelse(new_cluster_name == "D", "D (n=40)",
                                                                              ifelse(new_cluster_name == "E", "E (n=37)",
                                                                                     ifelse(new_cluster_name == "F", "F (n=14)", NA)))))))
six <- ggplot(S2_fig1, aes(month_start, perc_th, group= delivery_site_id_masked, color = new_cluster_name)) +
  facet_wrap(~ new_cluster_name1) +
  geom_point() +
  geom_line() +
  xlab("Month/Year") +
  ylab("Percent Telehealth Visits") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0.60), legend.position="right") +
  scale_color_manual(labels = c("A: No/Low uptake", "B: Medium uptake; steady decline", "C: Low/Medium uptake; steady decline",
                                "D: Medium/High uptake; steady decline", "E: High uptake; steady decline", "F: High uptake; remain high"), 
                     values = c("#00798c", "#d1495b", "#edae49", "#66a182", "#2e4057", "grey")) +
  
  labs(color='Cluster') +
  ylim(0, 100) 
#geom_smooth(method=lm, se=FALSE, col='red', size=1, span = 0.1)
six

#Black and white
six <- ggplot(S2_fig1, aes(month_start, perc_th, group= delivery_site_id_masked, color = new_cluster_name)) +
  facet_wrap(~ new_cluster_name1) +
  geom_point() +
  geom_line() +
  xlab("Month/Year") +
  ylab("Percent Telehealth Visits") +
  theme_bw() +
  guides(color = guide_legend(override.aes = list(color = NA, fill = NA))) +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0.60), legend.position="right",
        legend.title.align=0.25) +
  scale_color_manual(labels = c("A: No/Low uptake", "B: Medium uptake; steady decline", "C: Low/Medium uptake; steady decline",
                                "D: Medium/High uptake; steady decline", "E: High uptake; steady decline", "F: High uptake; remain high"), 
                     values = c("grey1", "grey1", "grey1", "grey1", "grey1", "grey1")) +
  
  labs(color='Cluster') +
  ylim(0, 100) 
six



#### Table 3  ----------------------------------------------------------------
selected_t3 <- monthly_level %>%
  select(month_start, delivery_site_id_masked, thencs, avth_encs) %>%
  group_by(delivery_site_id_masked) %>%
  mutate(perc_th = thencs/avth_encs*100)

selected_t3 <- selected_t3 %>% arrange(month_start) %>% 
  mutate(time = dense_rank(month_start)-1)

selected_t31 <- selected_t3 %>% mutate(perc_th1 = ifelse(is.na(perc_th)==TRUE, 0, perc_th)) %>%
  group_by(delivery_site_id_masked) 

  
by_cluster <- s5 %>% left_join(selected_t31)
new_clust <- by_cluster %>% 
  mutate(mixed_cluster = ifelse(new_cluster_name == "A" | new_cluster_name == "B", 1,
                                ifelse(new_cluster_name == "C" | new_cluster_name == "E", 2, 
                                       ifelse(new_cluster_name == "D", 3, 4))))

#feb 2020 avg % of telemed by mixed methods group:
feb <- new_clust %>% filter(month_start == "2020-02-01") %>%
  group_by(mixed_cluster) %>% summarise(pct_th = mean(perc_th1), sd_th = sd(perc_th1))
#april 2020 avg % of telemed by mixed methods group:
apr <- new_clust %>% filter(month_start == "2020-04-01") %>%
  group_by(mixed_cluster) %>% summarise(pct_th = mean(perc_th1), sd_th = sd(perc_th1))
#nov 2020 avg % of telemed by mixed methods group:
nov <- new_clust %>% filter(month_start == "2020-11-01") %>%
  group_by(mixed_cluster) %>% summarise(pct_th = mean(perc_th1), sd_th = sd(perc_th1))
#june 2021 avg % of telemed by mixed methods group:
jun <- new_clust %>% filter(month_start == "2021-06-01") %>%
  group_by(mixed_cluster) %>% summarise(pct_th = mean(perc_th1), sd_th = sd(perc_th1))
#April 2020 - june 2021 avg % of telemed by mixed methods group:
apr_jun <- new_clust %>% filter(month_start >= "2020-04-01" & month_start <= "2021-06-01") %>%
  group_by(mixed_cluster) %>% summarise(pct_th = mean(perc_th1), sd_th = sd(perc_th1))
